{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Prática - 19/10\n",
    "## Regressão Linear Múltipla e Regressão Polinomial"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "1. Implemente o coeficiente de determinação ajustado\n",
    "2. Faça uma regressão polinomial no dataset aerogerador.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "print(__doc__)\n",
    "\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "from sklearn import linear_model\n",
    "\n",
    "#Questão 1\n",
    "def total_sum_of_squares(y):\n",
    "    mean_y = np.mean(y)\n",
    "    return sum((v-mean_y)**2 for v in y)\n",
    "\n",
    "def r_squared(y,yb):\n",
    "    #y = valor real; yb = valor real\n",
    "    return 1.0 - sum((y-yb)**2)/total_sum_of_squares(y)\n",
    "\n",
    "def adjusted_r_squared(y,yb,p):\n",
    "    #y = valor real; yb = valor real\n",
    "    #p = numero de parametros(coeficientes da regressão)\n",
    "    #n = numero de amostras\n",
    "    n = len(y)\n",
    "    return 1.0 - (sum((y-yb)**2)/(n-p))/(total_sum_of_squares(y)/(n-1))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#Questão 2\n",
    "def expand_features(x,k):\n",
    "    n = len(x)\n",
    "    ones = np.ones(n).reshape(-1,1)\n",
    "    x = x.reshape(-1,1)\n",
    "    polynomial_x = np.append(ones,x,axis=1) #axis=1 append de coluna\n",
    "    for i in range(2,k+1):\n",
    "        polynomial_x = np.append(polynomial_x,x**i,axis=1)\n",
    "    return polynomial_x\n",
    "    \n",
    "data = np.loadtxt(\"aerogerador.txt\",delimiter=\",\")\n",
    "\n",
    "rdata = np.random.permutation(data)\n",
    "X = rdata[:,0]\n",
    "y = rdata[:,1]\n",
    "\n",
    "k = 4\n",
    "expanded_X = expand_features(X,k)\n",
    "#l = número de linhas, c = número de colunas\n",
    "print expanded_X.shape\n",
    "\n",
    "nt = int(len(expanded_X) * 0.8)\n",
    "X_train = expanded_X[:nt,:]\n",
    "X_test = expanded_X[nt:,:]\n",
    "y_train = y[:nt]\n",
    "y_test = y[nt:]\n",
    "\n",
    "regr = linear_model.LinearRegression()\n",
    "regr.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "yb = regr.predict(X_test)\n",
    "# The coefficients\n",
    "print('Coefficients: \\n', [regr.coef_ , regr.intercept_])\n",
    "# The mean squared error\n",
    "print(\"Mean squared error: %.2f\"\n",
    "      % np.mean((yb - y_test) ** 2))\n",
    "print (\"R-squared: %.2f\" % r_squared(yb,y_test))\n",
    "print (\"Adjusted R-squared: %.2f\" % adjusted_r_squared(yb,y_test,k+1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Plot outputs\n",
    "x1 = X_test[:,1]\n",
    "plt.scatter(x1, y_test,  color='black')\n",
    "plt.plot(np.sort(x1), np.sort(yb), color='blue',linewidth=3)\n",
    "\n",
    "plt.xticks(())\n",
    "plt.yticks(())\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda root]",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
